#coding=utf-8
__author__ = 'liangdong'
from numpy import *
import random
import math

"""
转换数据为矩阵格式
"""
def loadDataSet(fileName, delim='\t'):
    fr = open(fileName)
    stringArr = [line.strip().split(delim) for line in fr.readlines()]
    datArr = [map(float,line) for line in stringArr]
    return mat(datArr)

"""
读取评分数据
"""
def readData(datafile,delim='\t'):
    data = []
    for line in open(datafile):
        userid,itemid,record,mtime = line.split(delim)
        # data.append((userid,itemid))
        data.append((userid,itemid,int(record)))
    return data

"""
拆分数据为数据训练集和测试集
"""
def spliteData_bak(data,M=3,k=1,seed=10):
    test=[]
    train=[]
    random.seed(seed)
    for user,item in data:
        if random.randint(0,M) == k:
            test.append([user,item])
        else:
            train.append([user,item])
    return train,test

"""
拆分数据为数据训练集和测试集
数据为键值对
"""
def spliteData(data,M=3,k=1,seed=10):
    test={}
    train={}
    random.seed(seed)
    for user,item,record in data:
        if random.randint(0,M) == k:
            test.setdefault(user,{})
            test[user][item] = record
        else:
            train.setdefault(user,{})
            train[user][item] = record
    return train,test